GPS loggers are not the same for successful and failed breeders so they have to be processed differently before being merged. Moreover, data from 2015 and 2018 have different formats and different constraints so they are also processed separately. This file aims at formatting the data and compare results for 2018 when cleaning trips based on different threshold parameters.

Bird data import for 2018

Import data for successful breerders 2018

Loggers deployed on successful breeders are IgotU GPS. They give time in UTC GMT+2 so 3h have to be added to get UTC local time (UTC=GMT+5). They record locations every 5 minutes. They have a duty cycle of 17h on/7h off, providing data from 5am to 22pm UTC. Note that one successful breeder (BAR07,first file listed in directory) was tracked with a GPS-UHF logger and is processed with failed breeders. Two successful breeders (CB9, CC1) failed during tracking, losing their young chick so all data after failure were removed.

library(fields)
library(maptools)
library(maps)
library(stringr)
library(xlsx)
library(plotrix)
library(pastecs)
library(simr)
library(kableExtra)
library(plyr)
library(lme4)
library(afex)
source("R scripts/functions.R")

#Coordinates of the colony and date limit for the tracking study
colony<-data.frame(Longitude =77.52356,Latitude=-37.8545)
date.max<-as.POSIXct("2018-12-26 12:00:00", format="%F %H:%M:%S",tz="UTC")

#import of successful breeders
files.suc<-list.files("Data/RawData/Breeders_2018")
path.suc<-"Data/RawData/Breeders_2018/"
success.2018<-NULL

#Limit data before failure for individuals that have lost chick during tracking
fail.lim<-c(as.POSIXct("2018-12-14 10:00:00", format="%F %H:%M:%S",tz="UTC"),
            as.POSIXct("2018-12-06 12:00:00", format="%F %H:%M:%S",tz="UTC"))

#first file for successful breeder is with GPS-UHF logger so treated in failed breeders loop
for ( a in 2:length(files.suc)){
  
  temp.suc.2018<-read.csv2(paste(path.suc,files.suc[a],sep=""),as.is=T,
                       header=T,sep=",",fill=T)
  temp.suc.2018$DateTime<-paste(temp.suc.2018$Date,temp.suc.2018$Time,sep= " ")
  
  temp.suc.2018$DateTime <-as.POSIXct(strptime(temp.suc.2018$DateTime,format="%Y/%m/%d %H:%M:%S"),
                                  format="%F %H:%M:%S",tz="UTC") 
  temp.suc.2018$DateTime <-temp.suc.2018$DateTime + 3*3600  #get local time
  #extract ID from file name
  temp.suc.2018$ID<- str_sub(files.suc[a],start=11,end=13)
  temp.suc.2018$ID<-as.factor(temp.suc.2018$ID)
  
 
  #for individuals which have failed during tracking, remove dates after failure 
  if(temp.suc.2018$ID[1]=="CB9"){
    temp.suc.2018<-temp.suc.2018[temp.suc.2018$DateTime<fail.lim[1],]}
  if(temp.suc.2018$ID[1]=="CC1"){
    temp.suc.2018<-temp.suc.2018[temp.suc.2018$DateTime<fail.lim[2],]}
  
  temp.suc.2018<-temp.suc.2018[!duplicated(temp.suc.2018$DateTime)==T,]
  temp.suc.2018$Status<-as.factor("success")
  temp.suc.2018$Longitude<-as.numeric(temp.suc.2018$Longitude)
  temp.suc.2018$Latitude<-as.numeric(temp.suc.2018$Latitude)
  
  #distance between location and colony
  temp.suc.2018$Distmax<-NA
  temp.suc.2018$Distmax<-as.vector(rdist.earth(temp.suc.2018[,c("Longitude","Latitude")],
                                           colony[1,c("Longitude","Latitude")],miles=F))
 
  #calculate distance between 2 consecutive points and time elapsed between 2 consecutive points  
  temp.suc.2018$Distadj<-0
  for ( n in 2:nrow(temp.suc.2018)){
    temp.suc.2018$Distadj[n]<-rdist.earth(temp.suc.2018[n,c("Longitude","Latitude")],
                                      temp.suc.2018[n-1,c("Longitude","Latitude")],miles=F)
    
    temp.suc.2018$Difftime<-c(0,difftime(temp.suc.2018$DateTime[2:nrow(temp.suc.2018)],
                                   temp.suc.2018$DateTime[1:nrow(temp.suc.2018)-1],units="min"))
 
  }
  
  temp.suc.2018<-temp.suc.2018[,c("ID","Status","DateTime","Longitude","Latitude",
                                 "Distmax","Difftime","Distadj")]
  
  success.2018<-rbind(success.2018,temp.suc.2018)
}

Import data for failed breerders 2018

Loggers deployed in failed breeders are GPS-UHF Ecotone GPS. They give time in GMT so 5h have to be added to get local time (UTC=GMT+5). Note that two loggers on failed breeders had the same ID HAR01 so trips of 2 individuals have been recorded in the same file with the same ID. They have been separated manually based on locations. One successful breeder (BAR07,first file listed in Non-breeders directory) was tracked with a GPS-UHF logger and is processed with failed breeders.

##Import of failed birds (GPS-UHF)
files.fail<-list.files("Data/RawData/Non_Breeders_2018")
path.fail<-"Data/RawData/Non_Breeders_2018/"
fail.2018<-NULL

for ( a in 1:length(files.fail)){

  temp.fail.2018<-read.csv2(paste(path.fail,files.fail[a],sep=""),as.is=T,
                               header=T,sep=";",fill=T)
  temp.fail.2018$DateTime<-paste(temp.fail.2018$Year,"-", temp.fail.2018$Month,"-",
                           temp.fail.2018$Day, " ", temp.fail.2018$Hour, ":",
                           temp.fail.2018$Minute,":", temp.fail.2018$Second,sep="") 
                           
  temp.fail.2018$DateTime <-as.POSIXct(strptime(temp.fail.2018$DateTime,format="%F %H:%M:%S"),
                                 format="%F %H:%M:%S",tz="UTC") 
   temp.fail.2018$DateTime <- temp.fail.2018$DateTime + 5*3600
       
  temp.fail.2018<-temp.fail.2018[!duplicated(temp.fail.2018$DateTime)==T,]
  
  temp.fail.2018$Longitude<-as.numeric(temp.fail.2018$Longitude)
  temp.fail.2018$Latitude<-as.numeric(temp.fail.2018$Latitude)
  
  #Assign colony coordinates when in range
  temp.fail.2018$Longitude[which(is.na(temp.fail.2018$Longitude)==T & 
                                   temp.fail.2018$In.range==1)]<-colony$Longitude
  temp.fail.2018$Latitude[which(is.na(temp.fail.2018$Latitude)==T & 
                                  temp.fail.2018$In.range==1)]<-colony$Latitude
  temp.fail.2018<-temp.fail.2018[!is.na(temp.fail.2018$Latitude)==T,]
  
  #Discriminate the 2 individuals with same tag ID
  if(files.fail[a]=="HAR01_YNA.csv"){
    idd<-read.xlsx("Data/RawData/HAR01_double_2018.xlsx",
                   sheetIndex=1, header=TRUE,colIndex=c(4,11))
    temp.fail.2018$id<-idd$id
    
    #when "in.range" is true, locations are set to colony coordinates
    temp.fail.2018$Logger.ID<-ifelse(temp.fail.2018$id=="1",temp.fail.2018$Logger.ID<-"HAR11",
                                temp.fail.2018$Logger.ID<-"HAR12")
    
    temp.fail.2018<-temp.fail.2018[order(temp.fail.2018[,2],temp.fail.2018[,1]),-29]
      }
  
  #Discriminate the successful breeder
  temp.fail.2018$Status<-ifelse(temp.fail.2018$Logger.ID=="BAR07", temp.fail.2018$Status<-"success",
                           temp.fail.2018$Status<- "fail")
  temp.fail.2018$Status<-as.factor(temp.fail.2018$Status)
  
  #distance between location and colony
  temp.fail.2018$Distmax<-NA
  temp.fail.2018$Distmax<-as.vector(rdist.earth(temp.fail.2018[,c("Longitude","Latitude")],
                                           colony[1,c("Longitude","Latitude")],miles=F))
  temp.fail.2018$Distmax[temp.fail.2018$In.range==1]<-0
  names(temp.fail.2018)[1]<-"ID"
  temp.fail.2018$ID<-as.factor(temp.fail.2018$ID)
  
 #calculate distance between 2 consecutive points and time elapsed between 2 consecutive points 
    temp.fail.2018$Distadj<-0
  for ( n in 2:nrow(temp.fail.2018)){
    temp.fail.2018$Distadj[n]<-rdist.earth(temp.fail.2018[n,c("Longitude","Latitude")],
                                      temp.fail.2018[n-1,c("Longitude","Latitude")],miles=F)
  temp.fail.2018$Difftime<-c(0,difftime(temp.fail.2018$DateTime[2:nrow(temp.fail.2018)],
                                   temp.fail.2018$DateTime[1:nrow(temp.fail.2018)-1],units="min"))
  
  }
  
  temp.fail.2018<-temp.fail.2018[,c("ID","Status","DateTime","Longitude","Latitude","Distmax","Difftime","Distadj")]

  fail.2018<-rbind(fail.2018,temp.fail.2018)
}

fail.2018$Difftime[which(fail.2018$Difftime<0)]<-0
fail.2018<-subset(fail.2018,fail.2018$DateTime < date.max)

Merge data 2018, define trips and clean trips.

Parameters to define a trip are based on distance to the colony, number of consecutive locations out of the colony and total duration of the trip. Apart from 7h gaps due to duty cycles, no time gaps >10min are recorded in successful breeder tracks (the ones observed in previous plot occur at the colony). On the other side, data for failed breeders have more large time gaps.Therefore, a time gap threshold of 8h is applied on successful breeders. To better determine the effect of time gaps on the selection of trips in failed breeders, different time gap thresholds are tested and compared with the one of failed breeders to homogenize thresholds.

dataset.2018<-rbind(success.2018,fail.2018)
dataset.2018<-dataset.2018[duplicated(dataset.2018$DateTime)==FALSE,] # remove duplicated points with the same date and time
datasetTravelNb<-"NA"
dataset.2018$PathLength<-0

##Parameters to define a trip in 2018
dist.thres<-1             #distance threshold (in km)
last.dist<-10           #distance threshold (in km) when having only one location between 2 trips
row.thres<- 12               #number of rows constituting a trip
dur.thres<-120         #min duration of a trip

#Define trip number within each individual
ALL.2018<-define_trips(dataset.2018)   #function from source file
ALL.2018$TravelID<-paste(ALL.2018$ID,ALL.2018$TravelNb,sep=".")
save(ALL.2018,file="Data/NewlyCreatedData/raw_trips_2018.RData")

#summarize raw trips by individuals or status. There is no time gap thresholds here, just total duration and distance
dist.max.all.trips.2018.status<-raw_trips_summary_status(ALL.2018)  #function from source file

#clean trips and give summary by individuals or status. Several time gap thresholds are tested
fail<-subset(ALL.2018,ALL.2018$Status=="fail")

#10h gaps
diff.thres<-10*60
trips10h<-clean_trips_summary_status(ALL.2018,diff.thres)    #function from source file

#8h gaps
diff.thres<-8*60
trips8h<-clean_trips_summary_status(ALL.2018,diff.thres)    #function from source file

#6h gaps
diff.thres<-6*60
trips6h<-clean_trips_summary_status(fail,diff.thres)    #function from source file

#5h gaps
diff.thres<-5*60
trips5h<-clean_trips_summary_status(fail,diff.thres)    #function from source file

#4h gaps
diff.thres<-4*60
trips4h<-clean_trips_summary_status(fail,diff.thres)    #function from source file

#3h gaps
diff.thres<-3*60
trips3h<-clean_trips_summary_status(fail,diff.thres)    #function from source file

Summary of raw and selected trips by status

The first table shows the summary of trip characteristics with raw data (without any time gap threshold). The others show the summary of trip characteristics with different time gap thresholds. Note that successful breeders are not included in tests <8h because trips have to be retained so that the rest of the trip outside off cycle can be properly interpolated.

Summary of raw trips
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SETotPath MinTotPath MaxTotPath
success 9 19 375.7320 23.63303 232.97788 630.7741 58.20822 3.452602 30.847500 88.54556 1057.648 93.42379 578.37833 2185.241
fail 14 42 392.7061 52.04421 23.53467 1406.2726 90.70038 14.302777 5.453333 413.06778 1063.065 171.15263 59.27427 4775.886
Summary of trips with gaps <10h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
success 9 18 376.4886 24.97175 232.97788 630.7741 58.73886 3.606683 30.847500 88.54556 1062.657 98.62446 578.37833 2185.241
fail 13 39 381.0538 53.02511 32.59453 1406.2726 88.27746 14.877474 8.966667 413.06778 1082.234 182.07889 64.19188 4775.886
Summary of trips with gaps <8h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
success 9 18 376.4886 24.97175 232.97788 630.7741 58.73886 3.606683 30.847500 88.54556 1062.657 98.62446 578.37833 2185.241
fail 13 38 372.4466 53.71737 32.59453 1406.2726 86.51770 15.167048 8.966667 413.06778 1081.725 186.93445 64.19188 4775.886
Summary trips with gaps <6h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
fail 12 35 370.1589 58.04865 32.59453 1406.273 81.14568 16.08053 8.966667 413.0678 1072.182 199.8138 64.19188 4775.886
Summary trips with gaps <5h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
fail 12 32 381.2155 62.81695 32.59453 1406.273 85.26451 17.42591 8.966667 413.0678 1141.311 214.3825 64.19188 4775.886
Summary trips with gaps <4h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
fail 11 31 386.0655 64.68341 32.59453 1406.273 87.0755 17.89995 8.966667 413.0678 1169.7 219.4634 64.19188 4775.886
Summary trips with gaps <3h
Status NbInd NbTravel MeanDist SEDist MinDist MaxDist MeanDur SEDur MinTripDur MaxTripDur MeanTotPath SEtotPath MinTotPath MaxTotPath
fail 10 23 308.7396 65.74854 32.59453 968.9207 67.6774 19.98415 8.966667 413.0678 998.66 249.7139 64.19188 4775.886

Plots of trips by individuals

In the plots, all black dots represent trips/data that have been excluded. All colored trips are the selected ones.

Interpolation of clean trips

#clean trips with the chosen time gap threshold and give locations of selected trips
diff.thres<- 8*60
clean.trips.suc.2018<-clean_trips_locations(subset(ALL.2018,ALL.2018$Status=="success"),diff.thres)
diff.thres<- 5*60
clean.trips.fail.2018<-clean_trips_locations(fail,diff.thres)

clean.trips.loc.2018<-rbind(clean.trips.suc.2018,clean.trips.fail.2018)

#Number of gaps > 16min
length(which(clean.trips.fail.2018$Difftime>16 ))/nrow(clean.trips.fail.2018)*100
## [1] 11.96503
#Number of gaps > 20min
length(which(clean.trips.loc.2018$Difftime>20))/nrow(clean.trips.loc.2018)*100
## [1] 5.791855
#Number of gaps > 30min
length(which(clean.trips.loc.2018$Difftime>30))/nrow(clean.trips.loc.2018)*100
## [1] 5.369532
 reso <- 15 * 60       #time resolution between 2 points in seconds

new.trip<-NULL
loc.interpolated.2018<-NULL
id<-unique(clean.trips.loc.2018$ID)

 #interpolate data for each individual separately 
for (x in 1:length(id)){
  sub<-subset(clean.trips.loc.2018,clean.trips.loc.2018$ID==id[x])
  trip<-unique(sub$TravelNb)

   #interpolate data for each trip separately 
  for (y in 1:length(trip)){
    sub.trip<-subset(sub,sub$TravelNb==trip[y])
    sub.trip$TimeSinceOrigin<-rep(0,nrow(sub.trip))

  for (z in 1:nrow(sub.trip)){
    sub.trip$TimeSinceOrigin[z]<-difftime(sub.trip$DateTime[z],sub.trip$DateTime[1],units="sec")
  }

 
  # Resampling of long and lat with regular time intervals 
  sub.lat1 <- regul(x=sub.trip$TimeSinceOrigin, y=sub.trip$Latitude,
                    n=round((max(sub.trip$TimeSinceOrigin)/reso),0),
                    deltat=reso, methods="linear",xmin=0,units="sec")
  sub.lon1 <- regul(x=sub.trip$TimeSinceOrigin, y=sub.trip$Longitude,
                    n=round((max(sub.trip$TimeSinceOrigin)/reso),0),
                    deltat=reso, methods="linear",xmin=0,units="sec")

  new.sub <- data.frame(Longitude=sub.lon1[[2]]$Series, Latitude=sub.lat1[[2]]$Series, Time=as.vector(sub.lon1[[1]]),
                        DateTime=sub.trip$DateTime[1]+as.vector(sub.lon1[[1]]),TravelNb=trip[y])

  #Recreate a dataframe with individual and trip characteristics
  new.sub$ID<-id[x]
  new.sub$Status<-rep(unique(sub$Status),nrow(new.sub))
  new.sub$Distmax<-as.vector(rdist.earth(new.sub[,c("Longitude","Latitude")],
                                         colony[1,c("Longitude","Latitude")],miles=F))
  new.sub$Difftime<-c(0,difftime(new.sub$DateTime[2:nrow(new.sub)],new.sub$DateTime[1:nrow(new.sub)-1],units="mins"))
  new.sub$Distadj<-0
  new.sub$TravelID<-paste(new.sub$ID,new.sub$TravelNb,sep=".")


  for (z in 2:nrow(new.sub)){
    new.sub$Distadj[z]<-rdist.earth(new.sub[z,c("Longitude","Latitude")],
                                    new.sub[z-1,c("Longitude","Latitude")], miles=F)
  }

 new.trip<-rbind(new.trip,new.sub)
  }
}

#create a new global dataset with interpolated data
loc.interpolated.2018<-new.trip[,c("ID","Status","TravelNb","TravelID","DateTime","Longitude","Latitude",
                                   "Distmax","Difftime","Distadj")]

save(loc.interpolated.2018,file="Data/NewlyCreatedData/clean_interp_loc_2018.RData")


raw.trips.2018<-raw_trips_summary_ind(ALL.2018)

Statistics

Linear mixed models are applied to test whether the trip variables are different between failed and successful breeders on the raw trips

maxdist<-lmer(sqrt(Distmax) ~ Status + (1|ID),data=raw.trips.2018)
dur<-lmer(sqrt(TripDur) ~ Status + (1|ID),data=raw.trips.2018)
totdist<-lmer(sqrt(TotalPath) ~ Status + (1|ID),data=raw.trips.2018)

anova(maxdist)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Status 30.222  30.222     1    59  0.5155 0.4756
anova(dur)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Status 7.5893  7.5893     1    59  0.4899 0.4867
anova(totdist)
## Type III Analysis of Variance Table with Satterthwaite's method
##        Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Status 186.53  186.53     1    59  0.9328 0.3381

#Power analysis based on Simr package